from htmlstats import Htmlchuli
from urlclass import Quchong


e = ['http://toutiao.hc360.com/']
b = []
c = 1
for x in e:
    a = Htmlchuli(x,ci=3,n=5,timeout=60)
    g = a.html_if_key()
    b = a.html_etat()
    c += 1
    print(g)
    print("目前进行到第：" + str(c) + "个")
    while c < 200000:
        for y in b:
            if 'javascript' in y:
                continue
            else:
                qu = Quchong(y, './data/url/urlmd5.json').urlquchong()
                if qu:
                    a = Htmlchuli(y,ci=3,n=5,timeout=60)
                    g = a.html_if_key()
                    b = a.html_etat()
                    c += 1
                    print(g)
                    print("目前进行‘附表’到第：" + str(c) + "个")
                else:
                    print('-'*50 + '当前链接已经爬取过了！' + '-'*50)
                    continue
